data(us.cities)
# Get major cities for each sample region (state)
.states <- c("OR", "VT", "CO", "NC")
top.cities <- purrr::map_df(.states, function(s) {
out <- us.cities %>%
filter(country.etc==s) %>%
mutate(city = gsub(paste0(" ", s), "", name)) %>%
arrange(-pop)
if (s == "OR") {
out <- out %>%
head() %>%
filter(!(city %in% c("Gresham", "Hillsboro", "Corvallis",
"Beaverton", "Springfield")))
} else if (s == "CO") {
out <- out %>%
head() %>%
filter(!(city %in% c("Thornton", "Lakewood", "Aurora")))
} else if (s == "NC") {
out <- out %>%
head() %>%
filter(!(city %in% c("Greensboro", "Durham", "Fayetteville")))
} else {
out <- out %>% head()
}
out
})
# Load the map data
states <- map_data("state") %>%
filter(region %in% c("oregon", "north carolina", "colorado", "vermont"))
# Load your data
data.files <- list.files("data/final", full.names = T)
df <- purrr::map_df(data.files, readRDS)
caps.after.ws <- function(string) {
gsub("(?<=\\s)([a-z])", "\\U\\1", string, perl = T)
}
# Define a function to create a plot for each species
plot.for.species <- function(spec, st.abbr) {
st <- case_when(st.abbr == "CO" ~ "colorado",
st.abbr == "NC" ~ "north carolina",
st.abbr == "VT" ~ "vermont",
st.abbr == "OR" ~ "oregon",
T ~ "")
title <- caps.after.ws(paste(st.abbr, gsub("_", " ", spec),
"Observations, 2016-2019"))
p <- ggplot(data = states %>% filter(region == st)) +
geom_polygon(aes(x = long, y = lat, group = group),
fill = "#989875", color = "black") +
geom_point(data = df %>% filter(state == st.abbr & common.name == spec),
aes(x = lon, y = lat),
size=1, alpha=.5, fill = "red", shape=21) +
geom_point(data = top.cities %>% filter(country.etc == st.abbr),
aes(x=long, y=lat),
fill="gold", color="black", size=3.5, shape = 21) +
geom_text(data = top.cities %>% filter(country.etc == st.abbr),
aes(x=long, y=lat, label=city),
color="white", hjust=case_when(st.abbr=="NC"~.2,
st.abbr=="VT"~.65,
T~.5),
vjust=ifelse(st.abbr=="NC", -.65, 1.5),
size=4) +
coord_map() +
ggtitle(title) +
theme_minimal() +
theme(panel.background = element_blank(),
axis.text = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank(),
panel.grid = element_blank())
data.table(
state=st.abbr,
species=spec,
plot=list(p)
)
}
spec.state <- expand.grid(unique(df$common.name), unique(df$state)) %>%
rename(spec=Var1, st.abbr=Var2)
# Create a list of plots
plots <- purrr::map2_df(spec.state$spec,
spec.state$st.abbr,
~plot.for.species(.x, .y))
# Plot Ruddy Duck plots
do.call(ggpubr::ggarrange,
c(plots[species == "Ruddy Duck"]$plot,
list(nrow=2, ncol=2)))
# Plot Belted Kingfisher plots
do.call(ggpubr::ggarrange,
c(plots[species == "Belted Kingfisher"]$plot,
list(nrow=2, ncol=2)))
# Plot Wild Turkey plots
do.call(ggpubr::ggarrange,
c(plots[species == "Wild Turkey"]$plot,
list(nrow=2, ncol=2)))
# Plot Sharp-Shinned Hawk plots
do.call(ggpubr::ggarrange,
c(plots[species == "Sharp-shinned Hawk"]$plot,
list(nrow=2, ncol=2)))
# Plot Downy Woodpecker Plots
do.call(ggpubr::ggarrange,
c(plots[species == "Downy Woodpecker"]$plot,
list(nrow=2, ncol=2)))
# Plot Cedar Waxwing Plots
do.call(ggpubr::ggarrange,
c(plots[species == "Cedar Waxwing"]$plot,
list(nrow=2, ncol=2)))
# Plot Sandhill Crane Plots
do.call(ggpubr::ggarrange,
c(plots[species == "Sandhill Crane"]$plot,
list(nrow=2, ncol=2)))
# Plot Sanderling Plots
do.call(ggpubr::ggarrange,
c(plots[species == "Sanderling"]$plot,
list(nrow=2, ncol=2)))
states <- c("CO", "NC", "OR", "VT")
r.files <- paste0("data/final_rasters/", states, ".tif")
r.list <- purrr::map(r.files, rast)
names(r.list) <- states
TODO: - terra::freq - terra::density -
terra::layerCor
r.df <- map_df(states, function(s) {
df <- r.list[[s]] %>% as.data.frame()
names(df) <- names(df) %>% gsub(paste0("_", s), "", .)
df %>% setDT()
df[, state := factor(s, levels=states)]
df[apply(df, 1, function(.x) !any(is.na(.x)))]
})
# Custom function to process factor levels
clean.levels <- function(x) {
# Remove non-alphanumeric characters and replace with underscores
x <- gsub("[^a-zA-Z0-9]", "_", x)
# Convert to lowercase
x <- tolower(x)
# Remove any leading or trailing underscores
x <- gsub("^_|_$", "", x)
x <- gsub("__", "_", x)
x <- gsub("NLCD_Land_", "", x)
return(x)
}
r.df[, NLCD_Land := factor(clean.levels(levels(NLCD_Land))[NLCD_Land])]
# Convert factor columns to dummy variables
df.dummies <- data.table(model.matrix(~ . - 1, data = r.df[, .(NLCD_Land, state)])) %>%
cbind(r.df[, -which(names(r.df) %in% c("NLCD_Land", "state")), with=F])
names(df.dummies) <- gsub("NLCD_Land", "", names(df.dummies))
# Ensure that there is more than one value per column (remove otherwise)
uniq.1 <- t( df.dummies[, lapply(.SD, uniqueN)]) %>%
as.data.frame() %>%
filter(V1 == 1) %>%
row.names()
if (length(uniq.1) >= 1) {
df.dummies <- df.dummies[, -which(names(df.dummies) %in% uniq.1), with=F]
}
pca.fit <- PCA(df.dummies, graph=F)
plot.PCA(pca.fit, choix="var")
res <- pca.fit$var$coord %>%
as.data.frame() %>%
mutate(var=as.factor(rownames(.))) %>%
select(var, everything()) %>%
as_tibble()
rownames(res) <- NULL
p.d1 <- ggplot(res, aes(x = reorder(var, Dim.1), y = Dim.1)) +
geom_bar(stat = "identity", fill="darkblue") +
coord_flip() + # Makes it a horizontal bar chart
labs(title = "Variable importance for Dim.1", y = "Importance", x = "Variable") +
theme_minimal()
p.d2 <- ggplot(res, aes(x = reorder(var, Dim.2), y = Dim.2)) +
geom_bar(stat = "identity", fill="darkred") +
coord_flip() + # Makes it a horizontal bar chart
labs(title = "Variable importance for Dim.2", y = "Importance", x = "Variable") +
theme_minimal()
ggpubr::ggarrange(plotlist=list(p.d1, p.d2), nrow=2, ncol=1)
famd.fit <- FAMD(r.df, graph=F)
ggpubr::ggarrange(plotlist=purrr::map(
c("var", "quanti", "quali"),
~plot.FAMD(famd.fit, choix=.x)),
ncol=1, nrow=3)
res <- famd.fit$var$coord %>%
as.data.frame() %>%
mutate(var=as.factor(rownames(.))) %>%
select(var, everything()) %>%
as_tibble()
rownames(res) <- NULL
p.d1 <- ggplot(res, aes(x = reorder(var, Dim.1), y = Dim.1)) +
geom_bar(stat = "identity", fill="darkblue") +
coord_flip() + # Makes it a horizontal bar chart
labs(title = "Variable importance for Dim.1", y = "Importance", x = "Variable") +
theme_minimal()
p.d2 <- ggplot(res, aes(x = reorder(var, Dim.2), y = Dim.2)) +
geom_bar(stat = "identity", fill="darkred") +
coord_flip() + # Makes it a horizontal bar chart
labs(title = "Variable importance for Dim.2", y = "Importance", x = "Variable") +
theme_minimal()
ggpubr::ggarrange(plotlist=list(p.d1, p.d2), nrow=2, ncol=1)
First, get all of the grid-cell geometries (based on the resolution of the rasters) as spatial dataframes. Also extract each cell’s centroid, and row/column index from the original raster.
# Function to compute bounding box from centroid
compute.bbox <- function(x, y, half.res.x, half.res.y) {
c(x - half.res.x, x + half.res.x, y - half.res.y, y + half.res.y)
}
# Function to generate a single POLYGON from the bounding box coordinates
make.polygon <- function(xmin, ymin, xmax, ymax) {
m <- matrix(c(xmin, ymin,
xmax, ymin,
xmax, ymax,
xmin, ymax,
xmin, ymin),
ncol = 2, byrow = T)
st_polygon(list(m))
}
get.grid.geoms <- function(r, .crs=NULL) {
# Calculate the centroids of each cell
centroids <- terra::xyFromCell(r, seq_len(ncell(r)))
# Get resolution / 2 for x & y
half.res.x <- res(r)[1] / 2
half.res.y <- res(r)[2] / 2
# Compute bounding box for each centroid
bboxes <- t(apply(centroids, 1, function(pt) {
compute.bbox(pt[1], pt[2], half.res.x, half.res.y)
}))
# Create dataframe
dt <- as.data.table(bboxes)
colnames(dt) <- c("xmin", "xmax", "ymin", "ymax")
dt[, `:=` (
# Add centroid lat/lon values to dataframe
lon=centroids[, 1],
lat=centroids[, 2],
# Add i (row) and j (column) indices
i=rowFromCell(r, 1:ncell(r)),
j=colFromCell(r, 1:ncell(r))
)]
# Convert centroids to spatial points
dt[, centroid := purrr::map2(lon, lat, ~st_point(cbind(.x, .y)))]
# Create bounding box polygons for all rows and assign to geometry column
dt[, bbox := purrr::pmap(.l=list(xmin, ymin, xmax, ymax),
.f=make.polygon)]
# Make spatial frame
df <- st_sf(dt,
bbox = st_sfc(dt$bbox, crs=st_crs(r)),
centroid = st_sfc(dt$centroid, crs=st_crs(r)))
# Update CRS
if (!is.null(.crs)) {
df <- st_transform(df, st_crs(.crs)) %>%
st_set_geometry("centroid") %>%
st_transform(st_crs(.crs)) %>%
st_set_geometry("bbox")
}
df %>% select(i, j, bbox, centroid)
}
.grids <- purrr::map(r.list, ~get.grid.geoms(.x, .crs=4326))
names(.grids) <- states
# See sample grid dataframe
.grids$NC
# TODO: Select pseudo-absence points
TODO: Include pseudo-absence data
stratified.split.idx <- function(df, p=0.7, lat.lon.bins=25) {
# Cut along lat/lon values to create grids (lat.bin & lon.bin)
# lat.lon.bins is the number of divisions you want
df$lat.bin <- cut(df$lat, breaks=lat.lon.bins, labels = F)
df$lon.bin <- cut(df$lon, breaks=lat.lon.bins, labels = F)
# Create a new variable combining the stratification variables
df %>%
mutate(strata = paste(lat.bin, lon.bin, common.name, state)) %>%
pull(strata) %>%
# Create the data partitions
createDataPartition(., p = p, list = F) %>%
suppressWarnings()
}
prepare.data <- function(df, p=.7, lat.lon.bins=25) {
train.index <- stratified.split.idx(df, p=p, lat.lon.bins = lat.lon.bins)
df.train <- df[train.index, ]
df.test <- df[-train.index, ]
list(train = df.train,
test = df.test,
index = train.index)
}
train.test <- prepare.data(df, .7)
train <- df[train.test$index,]
test <- df[-train.test$index,]
Each of the 20 different Land Cover Categories falls under a “parent” category (see National Land Cover Database Class Legend and Description).